{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "特征选择"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(4, 19)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "from sklearn.feature_selection import VarianceThreshold\n",
    "\n",
    "X = np.array([\n",
    "    [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0.697, 0.460],\n",
    "    [2, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0.774, 0.376],\n",
    "    [3, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0.666, 0.091],\n",
    "    [4, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0.245, 0.057],\n",
    "])\n",
    "X.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1.   , 0.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   , 1.   ,\n",
       "        0.   , 0.   , 1.   , 0.   , 1.   , 0.   , 0.   , 0.697, 0.46 ],\n",
       "       [2.   , 1.   , 0.   , 0.   , 0.   , 0.   , 1.   , 1.   , 0.   ,\n",
       "        0.   , 0.   , 1.   , 0.   , 1.   , 0.   , 0.   , 0.774, 0.376],\n",
       "       [3.   , 1.   , 0.   , 0.   , 0.   , 1.   , 0.   , 1.   , 0.   ,\n",
       "        0.   , 0.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.666, 0.091],\n",
       "       [4.   , 0.   , 1.   , 0.   , 1.   , 0.   , 0.   , 0.   , 0.   ,\n",
       "        1.   , 1.   , 0.   , 0.   , 0.   , 1.   , 0.   , 0.245, 0.057]])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "XX = VarianceThreshold(threshold=0.01).fit_transform(X)\n",
    "XX"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(4, 18)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "XX.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 8.        ,  0.        ,  1.        ,  1.        ,  1.        ,\n",
       "        1.        ,  0.        ,  1.        ,  1.        ,  1.        ,\n",
       "        1.        ,  1.        ,  1.        ,  1.71207075, 57.64052606])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "from sklearn.feature_selection import SelectKBest, f_classif\n",
    "X = np.array([\n",
    "    [1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0.697, 0.460],\n",
    "    [2, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0.774, 0.376],\n",
    "    [3, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0.666, 0.091],\n",
    "    [4, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0.245, 0.057],\n",
    "])\n",
    "y = [1, 1, 0, 0]\n",
    "\n",
    "sk = SelectKBest(f_classif)\n",
    "sk.fit_transform(X, y)\n",
    "sk.scores_\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1.6       , 0.        , 1.        , 1.        , 1.        ,\n",
       "       1.        , 2.        , 0.        , 1.        , 1.        ,\n",
       "       1.        , 2.        , 1.        , 2.        , 1.        ,\n",
       "       1.        , 0.        , 0.13165407, 0.48104065])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "from sklearn.feature_selection import SelectKBest, chi2\n",
    "\n",
    "X = np.array([\n",
    "    [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0.697, 0.460],\n",
    "    [2, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0.774, 0.376],\n",
    "    [3, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0.666, 0.091],\n",
    "    [4, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0.245, 0.057],\n",
    "])\n",
    "y = [1, 1, 0, 0]\n",
    "\n",
    "sk = SelectKBest(chi2)\n",
    "sk.fit_transform(X, y)\n",
    "sk.scores_\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.58333333, 0.        , 0.        , 0.20833333, 0.        ,\n",
       "       0.        , 0.83333333, 0.        , 0.        , 0.20833333,\n",
       "       0.        , 0.83333333, 0.20833333, 0.83333333, 0.08333333,\n",
       "       0.        , 0.        , 0.        , 0.83333333])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "from sklearn.feature_selection import SelectKBest, mutual_info_classif\n",
    "\n",
    "X = np.array([\n",
    "    [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0.697, 0.460],\n",
    "    [2, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0.774, 0.376],\n",
    "    [3, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0.666, 0.091],\n",
    "    [4, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0.245, 0.057],\n",
    "])\n",
    "y = [1, 1, 0, 0]\n",
    "\n",
    "sk = SelectKBest(mutual_info_classif)\n",
    "sk.fit_transform(X, y)\n",
    "sk.scores_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([-0.89442719,  0.        , -0.57735027,  0.57735027, -0.57735027,\n",
       "       -0.57735027,  1.        ,  0.        ,  0.57735027, -0.57735027,\n",
       "       -0.57735027,  1.        , -0.57735027,  1.        , -0.57735027,\n",
       "       -0.57735027,  0.67912971,  0.9830899 ,  1.        ])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "X = np.array([\n",
    "    [1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0.697, 0.460, 1],\n",
    "    [2, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0.774, 0.376, 1],\n",
    "    [3, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0.666, 0.091, 0],\n",
    "    [4, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0.245, 0.057, 0],\n",
    "])\n",
    "corr = np.corrcoef(X, rowvar=False)\n",
    "corr[-1,:]"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "e7370f93d1d0cde622a1f8e1c04877d8463912d04d973331ad4851f04de6915a"
  },
  "kernelspec": {
   "display_name": "Python 3.9.7 64-bit",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
